In [1]:
# -*- coding: UTF-8 -*-
import pandas as pd
import numpy as np
In [2]:
# Load csv file first
data = pd.read_csv("data/results-makers-40.csv", encoding="utf-8")
In [3]:
# Check data
#data # Equals to data.head()
In [4]:
# Get list of columns
columns = list(data.columns.values)
In [5]:
# Delete element we do not want to export
del_columns = [u'Unnamed: 0',
u'id',
u'submitdate',
u'lastpage',
u'startlanguage',
u'startdate',
u'datestamp',
u'ipaddr',
u'Q002'
]
for i in del_columns:
del columns[columns.index(i)]
# Debug
#print columns
In [6]:
# Shuffle the data in order to change the order of the rows
# Learnt here: http://stackoverflow.com/a/15772330/2237113
data_export = data[columns]
sorted_data_export = data_export.reindex(np.random.permutation(data_export.index))
# Debug
#sorted_data_export
In [7]:
# Rename the index, for more anonymization... for all the anonymized data except business models data
new_index = {}
for k,i in enumerate(sorted_data_export.index):
new_index[i] = k
sorted_data_anonymized_final = sorted_data_export.rename(index=new_index)
#Debug
#sorted_data_anonymized_final
In [8]:
# Export datasets
sorted_data_anonymized_final.to_csv('data/makersinquiry-italy-2014.csv', encoding='utf-8')